# Draw three density graphs for the HARP 2.0 sample.
#  HARP LTV, PredLTV, and low-FICO sample PredLTV

w <- 10
bin_size <- 0.25

barfill <- "#4271AE"
barlines <- "#1F3552"

print(paste("w =", w))
print(paste("bin_size =", bin_size))

library(tidyverse)
library(haven)
library(magrittr)
library(lubridate)
library(grid)
library(gridExtra)

rootdir <- dirname(getwd())
scratchdir <- file.path(rootdir, "data", "scratch_data")
tempdir <- file.path(scratchdir, "temp")
outdir <- file.path(rootdir, "TEX", "Figures")

harp <- read_dta(file.path(tempdir, "gse_id-harp-cleaned.dta"))

harp %<>% mutate(date = as.Date(date, "%d%b%Y"))
post_2012 <- harp %>% filter(year(date) >= 2012)

sample <- post_2012 %>%
  mutate(z = ltvprev - 105.01) %>%
  filter(z >= -w & z < w) %>%
  mutate(z2 = z + 105) %>%
  mutate(z_bin = floor(z/bin_size) * bin_size + bin_size/2) %>%
  mutate(z_bin = z_bin + 105) 

med_credit_scorecore <- median(sample$credit_scorecore_b, na.rm = TRUE)
print(paste("Median credit score:", med_credit_scorecore))

low_sample <- sample %>% filter(credit_scorecore_b <= med_credit_scorecore & !is.na(credit_scorecore_b))

# Actual ltv
foo <- post_2012 %>%
  filter(oltv >= (105 - w) & oltv < (105 + w))

p1 <- ggplot(foo) +
  stat_count(mapping = aes(x = oltv, y = ..prop.., group = 1), width = 1,
           color = barlines, fill = barfill) +
  xlab("HARP LTV") + ylab("") + 
  ggtitle("(a) HARP LTV") +
  theme(plot.title = element_text(hjust = 0.5))

p2 <- ggplot(sample, aes(x = z2, y = ..count../sum(..count..))) +
  stat_bin(binwidth = bin_size, closed = "left", boundary = 105,
           color = barlines, fill = barfill, lwd = 0.3) +
  xlab("MaxLTV") + ylab("") +
  ggtitle("(b) PredLTV") +
  theme(plot.title = element_text(hjust = 0.5))

p3 <- ggplot(low_sample, aes(x = z2, y = ..count../sum(..count..))) +
  stat_bin(binwidth = bin_size, closed = "left", boundary = 105,
           color = barlines, fill = barfill, lwd = 0.3) +
  xlab("MaxLTV") + ylab("") +
  ggtitle("(c) Low Credit Score") +
  theme(plot.title = element_text(hjust = 0.5))

pdf(file.path(outdir, "20190923_harp_density.pdf"), width = 8, height = 7)
grid.arrange(p1, p2, p3, nrow = 2)
dev.off()

# Separate files
ggplot(foo) +
  stat_count(mapping = aes(x = oltv, y = ..prop.., group = 1), width = 1,
             color = barlines, fill = barfill) +
  xlab("HARP LTV") + theme_bw() + 
  theme(axis.title.y = element_blank(),
        plot.background = element_rect(fill = "gray92")) +
  ggsave(file.path(outdir, "20190923_harp_density_actual.pdf"),
         width = 5, height = 4)

ggplot(sample, aes(x = z2, y = ..count../sum(..count..))) +
  stat_bin(binwidth = bin_size, closed = "left", boundary = 105,
           color = barlines, fill = barfill, lwd = 0.3) +
  xlab("MaxLTV") + theme_bw() + 
  theme(axis.title.y = element_blank(),
        plot.background = element_rect(fill = "gray92")) +
  ggsave(file.path(outdir, "20190923_harp_density_predltv.pdf"),
         width = 5, height = 4)

ggplot(low_sample, aes(x = z2, y = ..count../sum(..count..))) +
  stat_bin(binwidth = bin_size, closed = "left", boundary = 105,
           color = barlines, fill = barfill, lwd = 0.3) +
  xlab("MaxLTV") + theme_bw() + 
  theme(axis.title.y = element_blank(),
        plot.background = element_rect(fill = "gray92")) +
  ggsave(file.path(outdir, "20190923_harp_density_predltv_lowcredit_score.pdf"),
         width = 5, height = 4)
